{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/javascript": [
       "\n",
       "require(['notebook/js/codecell'], function(codecell) {\n",
       "  codecell.CodeCell.options_default.highlight_modes[\n",
       "      'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n",
       "  Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n",
       "      Jupyter.notebook.get_cells().map(function(cell){\n",
       "          if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n",
       "  });\n",
       "});\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "\n",
    "sys.path.append(os.path.abspath(\"../common\"))\n",
    "\n",
    "import math\n",
    "import time\n",
    "import numpy as np\n",
    "import cv2\n",
    "import pynq\n",
    "import dac_sdc\n",
    "import ctypes\n",
    "from multiprocessing import Process, Pipe, Queue, Event, Manager, Lock, Value\n",
    "\n",
    "team_name = 'iSmart'\n",
    "team = dac_sdc.Team(team_name, batch_size = 64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "got nn accelerator!\n"
     ]
    }
   ],
   "source": [
    "overlay = pynq.Overlay(team.get_bitstream_path())\n",
    "cfuns = ctypes.cdll.LoadLibrary(\"./load_image_pingpong3.so\")\n",
    "dma = overlay.axi_dma_0\n",
    "xlnk = pynq.Xlnk()\n",
    "nn_ctrl = overlay.ultra_net_0\n",
    "print('got nn accelerator!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "BATCH_SIZE = team.batch_size\n",
    "IMAGE_RAW_ROW = 360\n",
    "IMAGE_RAW_COL = 640\n",
    "IMAGE_ROW = 160\n",
    "IMAGE_COL = 320\n",
    "GRID_ROw = 10\n",
    "GRID_COL = 20\n",
    "X_SCALE = IMAGE_RAW_COL / IMAGE_COL\n",
    "Y_SCALE = IMAGE_RAW_ROW / IMAGE_ROW\n",
    "\n",
    "in_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)\n",
    "in_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, IMAGE_RAW_ROW, IMAGE_RAW_COL, 3), dtype=np.uint8, cacheable = 1)\n",
    "in_buffers = [in_buffer0, in_buffer1]\n",
    "out_buffer0 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)\n",
    "out_buffer1 = xlnk.cma_array(shape=(BATCH_SIZE, GRID_ROw, GRID_COL, 6, 6), dtype=np.int32, cacheable = 1)\n",
    "out_buffers = [out_buffer0, out_buffer1]\n",
    "\n",
    "# use c functions to load image\n",
    "def load_image(image_paths, buff):\n",
    "    ts = time.time()\n",
    "    paths = [str(path) for path in image_paths]\n",
    "    tmp = np.asarray(buff)\n",
    "    dataptr = tmp.ctypes.data_as(ctypes.c_char_p)\n",
    "    paths_p_list = [ctypes.c_char_p(bytes(str_, 'utf-8')) for str_ in paths]\n",
    "    paths_c = (ctypes.c_char_p*len(paths_p_list))(*paths_p_list)\n",
    "    cfuns.load_image(paths_c, dataptr, len(paths), IMAGE_ROW, IMAGE_COL, 3)\n",
    "    return time.time() - ts\n",
    "    \n",
    "def sigmoid(x):\n",
    "    s = 1 / (1 + np.exp(-x))\n",
    "    return s\n",
    "\n",
    "def yolo(out_buffer, batch_n,_,result):\n",
    "    out_buffer_dataptr=out_buffer.ctypes.data_as(ctypes.c_char_p)\n",
    "    rst=np.empty( (batch_n,4),dtype=np.int32)\n",
    "    rst_dataptr=rst.ctypes.data_as(ctypes.c_char_p)\n",
    "    cfuns.yolo(out_buffer_dataptr,batch_n,rst_dataptr)\n",
    "    result.extend(rst.tolist())\n",
    "    \n",
    "which_buffer = 0\n",
    "first_batch = True\n",
    "net_cnt = 0\n",
    "last_batch_size = BATCH_SIZE\n",
    "\n",
    "def net(img_paths, result):\n",
    "    global first_batch\n",
    "    global which_buffer    \n",
    "    global net_cnt\n",
    "    global last_batch_size\n",
    "    if first_batch == True:\n",
    "        first_batch = False\n",
    "        which_buffer = 0\n",
    "        load_image(img_paths, in_buffers[which_buffer])\n",
    "        return\n",
    "    # count\n",
    "    net_cnt += 1\n",
    "    nn_ctrl.write(0x0, 0) # Reset\n",
    "    nn_ctrl.write(0x10, in_buffers[which_buffer].shape[0])\n",
    "    nn_ctrl.write(0x0, 1) # Deassert reset\n",
    "    \n",
    "    dma.recvchannel.transfer(out_buffers[which_buffer])\n",
    "    dma.sendchannel.transfer(in_buffers[which_buffer])\n",
    "    \n",
    "    # switch buffer\n",
    "    if which_buffer == 0:\n",
    "        which_buffer = 1\n",
    "    else:\n",
    "        which_buffer = 0\n",
    "\n",
    "    if img_paths is not None:\n",
    "        load_image(img_paths, in_buffers[which_buffer])\n",
    "    \n",
    "    # yolo \n",
    "    if net_cnt > 1:\n",
    "        yolo(out_buffers[which_buffer], BATCH_SIZE, 127 * 15, result)\n",
    "    \n",
    "    if img_paths is not None and len(img_paths) != BATCH_SIZE:\n",
    "        last_batch_size = len(img_paths)\n",
    "\n",
    "    dma.sendchannel.wait()\n",
    "    dma.recvchannel.wait()\n",
    "\n",
    "    # last batch \n",
    "    if img_paths is None:\n",
    "        yolo(out_buffers[(which_buffer + 1) % 2], last_batch_size, 127 * 15, result) # 8-bit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total time: 3.991365432739258 seconds\n",
      "Total energy: 22.8304806855 J\n",
      "images nums: 1000\n",
      "fps: 250.54082790753242\n",
      "\n"
     ]
    }
   ],
   "source": [
    "time_list = []\n",
    "fps_list = []\n",
    "energy_list = []\n",
    "num_rounds = 1\n",
    "\n",
    "################################Inference##################################\n",
    "interval_time = 0\n",
    "total_time = 0\n",
    "total_energy = 0\n",
    "result = list()\n",
    "team.reset_batch_count()\n",
    "\n",
    "rails = pynq.get_rails()\n",
    "\n",
    "load_time = 0\n",
    "start = time.time()\n",
    "recorder = pynq.DataRecorder(rails[\"5V\"].power)\n",
    "with recorder.record(0.05):\n",
    "    while True:\n",
    "        image_paths = team.get_next_batch()\n",
    "        net(image_paths, result)\n",
    "        if image_paths is None:\n",
    "            break\n",
    "\n",
    "end = time.time()\n",
    "t = end - start\n",
    "\n",
    "# Energy measurements    \n",
    "energy = recorder.frame[\"5V_power\"].mean() * t    \n",
    "\n",
    "\n",
    "total_time = t\n",
    "total_energy = energy\n",
    "time_list.append(total_time)\n",
    "energy_list.append(total_energy)\n",
    "fps_list.append(len(result) / total_time)\n",
    "\n",
    "print(\"Total time:\", total_time, \"seconds\")\n",
    "print(\"Total energy:\", total_energy, \"J\")\n",
    "print('images nums: {}'.format(len(result)))\n",
    "print('fps: {}'.format(len(result) / total_time))\n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "team.save_results_xml(result, total_time, energy)\n",
    "xlnk.xlnk_reset()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
